package com.shihb.esjddemo.util;

import com.shihb.esjddemo.pojo.Content;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

/**
 * Description: Version:1.0.0
 *
 * @author shihb
 * @date 2020/4/17 17:27
 */
@Component
public class HtmlParseUtil {

  public List<Content> parseJd(String keyWord) throws Exception {
    List<Content> list = new ArrayList<>();
    // 获取请求
    String url = "https://search.jd.com/Search?keyword=" + keyWord;
    // 解析网页返回DOM对象
    Document dom = Jsoup.parse(new URL(url), 30000);
    // 获取商品列表的div
    Element div = dom.getElementById("J_goodsList");
    // 获取所有li
    Elements lis = div.getElementsByTag("li");
    for (Element li : lis) {
      // 图片懒加载
      String img = li.getElementsByTag("img").eq(0).attr("source-data-lazy-img");
      String price = li.getElementsByClass("p-price").eq(0).text();
      String title = li.getElementsByClass("p-name").eq(0).text();
      list.add(new Content(img, price, title));
    }
    return list;
  }

}
